2018-05-06
AirPassengers dataAirPassengers
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ## 1949 112 118 132 129 121 135 148 148 136 119 104 118 ## 1950 115 126 141 135 125 149 170 170 158 133 114 140 ## 1951 145 150 178 163 172 178 199 199 184 162 146 166 ## 1952 171 180 193 181 183 218 230 242 209 191 172 194 ## 1953 196 196 236 235 229 243 264 272 237 211 180 201 ## 1954 204 188 235 227 234 264 302 293 259 229 203 229 ## 1955 242 233 267 269 270 315 364 347 312 274 237 278 ## 1956 284 277 317 313 318 374 413 405 355 306 271 306 ## 1957 315 301 356 348 355 422 465 467 404 347 305 336 ## 1958 340 318 362 348 363 435 491 505 404 359 310 337 ## 1959 360 342 406 396 420 472 548 559 463 407 362 405 ## 1960 417 391 419 461 472 535 622 606 508 461 390 432
AirPassengers dataplot(AirPassengers)
air <- data.frame(passengers = as.numeric(AirPassengers),
year = rep(1949:1960, each = 12),
month = factor(rep(1:12, 12)))
air$time <- 1:nrow(air)
air
## passengers year month time ## 1 112 1949 1 1 ## 2 118 1949 2 2 ## 3 132 1949 3 3 ## 4 129 1949 4 4 ## 5 121 1949 5 5 ## 6 135 1949 6 6 ## 7 148 1949 7 7 ## 8 148 1949 8 8 ## 9 136 1949 9 9 ## 10 119 1949 10 10 ## 11 104 1949 11 11 ## 12 118 1949 12 12 ## 13 115 1950 1 13 ## 14 126 1950 2 14 ## 15 141 1950 3 15 ## 16 135 1950 4 16 ## 17 125 1950 5 17 ## 18 149 1950 6 18 ## 19 170 1950 7 19 ## 20 170 1950 8 20 ## 21 158 1950 9 21 ## 22 133 1950 10 22 ## 23 114 1950 11 23 ## 24 140 1950 12 24 ## 25 145 1951 1 25 ## 26 150 1951 2 26 ## 27 178 1951 3 27 ## 28 163 1951 4 28 ## 29 172 1951 5 29 ## 30 178 1951 6 30 ## 31 199 1951 7 31 ## 32 199 1951 8 32 ## 33 184 1951 9 33 ## 34 162 1951 10 34 ## 35 146 1951 11 35 ## 36 166 1951 12 36 ## 37 171 1952 1 37 ## 38 180 1952 2 38 ## 39 193 1952 3 39 ## 40 181 1952 4 40 ## 41 183 1952 5 41 ## 42 218 1952 6 42 ## 43 230 1952 7 43 ## 44 242 1952 8 44 ## 45 209 1952 9 45 ## 46 191 1952 10 46 ## 47 172 1952 11 47 ## 48 194 1952 12 48 ## 49 196 1953 1 49 ## 50 196 1953 2 50 ## 51 236 1953 3 51 ## 52 235 1953 4 52 ## 53 229 1953 5 53 ## 54 243 1953 6 54 ## 55 264 1953 7 55 ## 56 272 1953 8 56 ## 57 237 1953 9 57 ## 58 211 1953 10 58 ## 59 180 1953 11 59 ## 60 201 1953 12 60 ## 61 204 1954 1 61 ## 62 188 1954 2 62 ## 63 235 1954 3 63 ## 64 227 1954 4 64 ## 65 234 1954 5 65 ## 66 264 1954 6 66 ## 67 302 1954 7 67 ## 68 293 1954 8 68 ## 69 259 1954 9 69 ## 70 229 1954 10 70 ## 71 203 1954 11 71 ## 72 229 1954 12 72 ## 73 242 1955 1 73 ## 74 233 1955 2 74 ## 75 267 1955 3 75 ## 76 269 1955 4 76 ## 77 270 1955 5 77 ## 78 315 1955 6 78 ## 79 364 1955 7 79 ## 80 347 1955 8 80 ## 81 312 1955 9 81 ## 82 274 1955 10 82 ## 83 237 1955 11 83 ## 84 278 1955 12 84 ## 85 284 1956 1 85 ## 86 277 1956 2 86 ## 87 317 1956 3 87 ## 88 313 1956 4 88 ## 89 318 1956 5 89 ## 90 374 1956 6 90 ## 91 413 1956 7 91 ## 92 405 1956 8 92 ## 93 355 1956 9 93 ## 94 306 1956 10 94 ## 95 271 1956 11 95 ## 96 306 1956 12 96 ## 97 315 1957 1 97 ## 98 301 1957 2 98 ## 99 356 1957 3 99 ## 100 348 1957 4 100 ## 101 355 1957 5 101 ## 102 422 1957 6 102 ## 103 465 1957 7 103 ## 104 467 1957 8 104 ## 105 404 1957 9 105 ## 106 347 1957 10 106 ## 107 305 1957 11 107 ## 108 336 1957 12 108 ## 109 340 1958 1 109 ## 110 318 1958 2 110 ## 111 362 1958 3 111 ## 112 348 1958 4 112 ## 113 363 1958 5 113 ## 114 435 1958 6 114 ## 115 491 1958 7 115 ## 116 505 1958 8 116 ## 117 404 1958 9 117 ## 118 359 1958 10 118 ## 119 310 1958 11 119 ## 120 337 1958 12 120 ## 121 360 1959 1 121 ## 122 342 1959 2 122 ## 123 406 1959 3 123 ## 124 396 1959 4 124 ## 125 420 1959 5 125 ## 126 472 1959 6 126 ## 127 548 1959 7 127 ## 128 559 1959 8 128 ## 129 463 1959 9 129 ## 130 407 1959 10 130 ## 131 362 1959 11 131 ## 132 405 1959 12 132 ## 133 417 1960 1 133 ## 134 391 1960 2 134 ## 135 419 1960 3 135 ## 136 461 1960 4 136 ## 137 472 1960 5 137 ## 138 535 1960 6 138 ## 139 622 1960 7 139 ## 140 606 1960 8 140 ## 141 508 1960 9 141 ## 142 461 1960 10 142 ## 143 390 1960 11 143 ## 144 432 1960 12 144
plot(with(air, tapply(passengers, year, mean)) ~ I(1949:1960),
ylab = "Mean number of passengers", xlab = "Year", type = "b")
plot(with(air, tapply(passengers, month, mean)) ~ I(1:12),
ylab = "Mean number of passengers", xlab = "Month", type = "h")
(mod_air <- lm(passengers ~ year * month, data = air))
## ## Call: ## lm(formula = passengers ~ year * month, data = air) ## ## Coefficients: ## (Intercept) year month2 month3 month4 month5 month6 month7 ## -5.407e+04 2.779e+01 6.356e+03 2.129e+02 -3.118e+03 -7.275e+03 -1.786e+04 -2.994e+04 ## month8 month9 month10 month11 month12 year:month2 year:month3 year:month4 ## -2.936e+04 -1.232e+04 -5.237e+03 3.060e+03 -1.094e+03 -3.255e+00 -9.441e-02 1.608e+00 ## year:month5 year:month6 year:month7 year:month8 year:month9 year:month10 year:month11 year:month12 ## 3.738e+00 9.171e+00 1.537e+01 1.508e+01 6.336e+00 2.692e+00 -1.570e+00 5.699e-01
plot(residuals(mod_air), type = "b") abline(h = 0, lty = 2, col = "red")
lmtest::dwtest(mod_air)
## ## Durbin-Watson test ## ## data: mod_air ## DW = 0.35903, p-value < 2.2e-16 ## alternative hypothesis: true autocorrelation is greater than 0
acf(residuals(mod_air))
library(nlme) MAR1 <- corAR1(value = 0.5, form = ~ 1|year, fixed = FALSE) MAR1 <- Initialize(MAR1, data = air) round(corMatrix(MAR1)[["1950"]], 2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] ## [1,] 1.00 0.50 0.25 0.13 0.06 0.03 0.02 0.01 0.00 0.00 0.00 0.00 ## [2,] 0.50 1.00 0.50 0.25 0.13 0.06 0.03 0.02 0.01 0.00 0.00 0.00 ## [3,] 0.25 0.50 1.00 0.50 0.25 0.13 0.06 0.03 0.02 0.01 0.00 0.00 ## [4,] 0.13 0.25 0.50 1.00 0.50 0.25 0.13 0.06 0.03 0.02 0.01 0.00 ## [5,] 0.06 0.13 0.25 0.50 1.00 0.50 0.25 0.13 0.06 0.03 0.02 0.01 ## [6,] 0.03 0.06 0.13 0.25 0.50 1.00 0.50 0.25 0.13 0.06 0.03 0.02 ## [7,] 0.02 0.03 0.06 0.13 0.25 0.50 1.00 0.50 0.25 0.13 0.06 0.03 ## [8,] 0.01 0.02 0.03 0.06 0.13 0.25 0.50 1.00 0.50 0.25 0.13 0.06 ## [9,] 0.00 0.01 0.02 0.03 0.06 0.13 0.25 0.50 1.00 0.50 0.25 0.13 ## [10,] 0.00 0.00 0.01 0.02 0.03 0.06 0.13 0.25 0.50 1.00 0.50 0.25 ## [11,] 0.00 0.00 0.00 0.01 0.02 0.03 0.06 0.13 0.25 0.50 1.00 0.50 ## [12,] 0.00 0.00 0.00 0.00 0.01 0.02 0.03 0.06 0.13 0.25 0.50 1.00
(mod_air2 <- lme(passengers ~ month * year, random = ~ 1 | year, data = air,
correlation = MAR1, method = "REML")) ## confusing: correlation is modelled between months!
## Linear mixed-effects model fit by REML ## Data: air ## Log-restricted-likelihood: -477.871 ## Fixed: passengers ~ month * year ## (Intercept) month2 month3 month4 month5 month6 month7 month8 ## -5.406738e+04 6.355626e+03 2.129324e+02 -3.118268e+03 -7.275373e+03 -1.785545e+04 -2.993915e+04 -2.935851e+04 ## month9 month10 month11 month12 year month2:year month3:year month4:year ## -1.232239e+04 -5.237282e+03 3.059512e+03 -1.093845e+03 2.778671e+01 -3.255245e+00 -9.440559e-02 1.608392e+00 ## month5:year month6:year month7:year month8:year month9:year month10:year month11:year month12:year ## 3.737762e+00 9.171329e+00 1.537413e+01 1.507692e+01 6.335664e+00 2.692308e+00 -1.569930e+00 5.699301e-01 ## ## Random effects: ## Formula: ~1 | year ## (Intercept) Residual ## StdDev: 13.4979 8.34667 ## ## Correlation Structure: AR(1) ## Formula: ~1 | year ## Parameter estimate(s): ## Phi ## 0.3270032 ## Number of Observations: 144 ## Number of Groups: 12
(mod_air2b <- lme(passengers ~ month * year, random = ~ 1 | year, data = air,
correlation = corAR1(form = ~ 1|year), method = "REML"))
## Linear mixed-effects model fit by REML ## Data: air ## Log-restricted-likelihood: -477.871 ## Fixed: passengers ~ month * year ## (Intercept) month2 month3 month4 month5 month6 month7 month8 ## -5.406738e+04 6.355626e+03 2.129324e+02 -3.118268e+03 -7.275373e+03 -1.785545e+04 -2.993915e+04 -2.935851e+04 ## month9 month10 month11 month12 year month2:year month3:year month4:year ## -1.232239e+04 -5.237282e+03 3.059512e+03 -1.093845e+03 2.778671e+01 -3.255245e+00 -9.440559e-02 1.608392e+00 ## month5:year month6:year month7:year month8:year month9:year month10:year month11:year month12:year ## 3.737762e+00 9.171329e+00 1.537413e+01 1.507692e+01 6.335664e+00 2.692308e+00 -1.569930e+00 5.699301e-01 ## ## Random effects: ## Formula: ~1 | year ## (Intercept) Residual ## StdDev: 13.4979 8.346669 ## ## Correlation Structure: AR(1) ## Formula: ~1 | year ## Parameter estimate(s): ## Phi ## 0.3270031 ## Number of Observations: 144 ## Number of Groups: 12
mod_air3 <- lme(passengers ~ month * year, random = ~ 1 | year, data = air, method = "REML") anova(mod_air2, mod_air3)
## Model df AIC BIC logLik Test L.Ratio p-value ## mod_air2 1 27 1009.742 1085.004 -477.8710 ## mod_air3 2 26 1017.362 1089.837 -482.6811 1 vs 2 9.620272 0.0019
mod_airARMA1 <- update(mod_air2, correlation = corARMA(form = ~ 1 | year, p = 1, q = 0))
mod_airARMA2 <- update(mod_air2, correlation = corARMA(form = ~ 1 | year, p = 4, q = 0))
mod_airARMA3 <- update(mod_air2, correlation = corARMA(form = ~ 1 | year, p = 2, q = 2))
rbind(mod_air2 = AIC(mod_air2),
mod_airARMA1 = AIC(mod_airARMA1),
mod_airARMA2 = AIC(mod_airARMA2),
mod_airARMA3 = AIC(mod_airARMA3))
## [,1] ## mod_air2 1009.742 ## mod_airARMA1 1009.742 ## mod_airARMA2 1008.911 ## mod_airARMA3 1011.174
Note: do not compare AICs or likelihoods from nlme to those from other packages!
(it seems they have failed to consider a constant term…)
mod_air4 <- update(mod_air2, correlation = corARMA(form = ~ 1 | year, p = 4, q = 0), method = "ML") data.for.plot <- expand.grid(month = factor(1:12), year = 1949:1960) data.for.plot$obs <- air$passengers data.for.plot$time <- seq(1949, 1960, length = (1960 - 1949 + 1) * 12) data.for.plot$fit_lm <- predict(mod_air) data.for.plot$fit_lme <- predict(mod_air4)
plot(obs ~ time, data = data.for.plot, type = "l", ylim = c(0, 700), ylab = "Passengers") points(fit_lm ~ time, data = data.for.plot, type = "l", col = "red") points(fit_lme ~ time, data = data.for.plot, type = "l", col = "blue")
plot(residuals(mod_air4), type = "l") abline(h = 0, lty = 2, col = "red")
spaMMlibrary(spaMM) air$year_z <- scale(air$year) ## otherwise hard to fit! mod_air_spaMM1 <- fitme(passengers ~ month * year_z + AR1(1|time %in% year), data = air, method = "REML") mod_air_spaMM2 <- fitme(passengers ~ month * year_z, data = air, method = "REML") print(AIC(mod_air_spaMM1))
## marginal AIC: conditional AIC: dispersion AIC: effective df: ## 1064.94556 1002.89595 927.67544 80.80271
print(AIC(mod_air_spaMM2))
## marginal AIC: ## 1233.527
mod_air_spaMM1
## formula: passengers ~ month * year_z + AR1(1 | time %in% year)
## REML: Estimation of lambda, phi and corrPars by REML.
## Estimation of fixed effects by ML.
## Family: gaussian ( link = identity )
## ------------ Fixed effects (beta) ------------
## Estimate Cond. SE t-value
## (Intercept) 241.750 4.376 55.2491
## month2 -6.750 2.791 -2.4189
## month3 28.417 2.993 9.4934
## month4 25.333 3.176 7.9763
## month5 30.083 3.342 9.0005
## month6 69.917 3.495 20.0054
## month7 109.583 3.635 30.1428
## month8 109.333 3.766 29.0339
## month9 60.667 3.887 15.6082
## month10 24.833 4.000 6.2086
## month11 -8.917 4.106 -2.1718
## month12 20.083 4.205 4.7764
## year_z 96.256 4.391 21.9216
## month2:year_z -11.276 2.800 -4.0269
## month3:year_z -0.327 3.004 -0.1089
## month4:year_z 5.572 3.187 1.7481
## month5:year_z 12.948 3.354 3.8604
## month6:year_z 31.770 3.507 9.0589
## month7:year_z 53.258 3.648 14.5984
## month8:year_z 52.228 3.779 13.8211
## month9:year_z 21.947 3.900 5.6269
## month10:year_z 9.326 4.014 2.3236
## month11:year_z -5.438 4.120 -1.3200
## month12:year_z 1.974 4.219 0.4679
## --------------- Random effects ---------------
## Family: gaussian ( link = identity )
## --- Correlation parameters:
## 1.ARphi
## 0.9615589
## --- Variance parameters ('lambda'):
## lambda = var(u) for u ~ Gaussian;
## time %in%. : 190.3
## --- Coefficients for log(lambda):
## Group Term Estimate Cond.SE
## time %in%. (Intercept) 5.249 0.2259
## # of obs: 144; # of groups: time %in%., 144
## ------------- Residual variance -------------
## Coefficients for log(phi) ~ 1 :
## Estimate Cond. SE
## (Intercept) 3.674 0.1573
## Estimate of phi=residual var: 39.41
## ------------- Likelihood values -------------
## logLik
## p_v(h) (marginal L): -506.4728
## p_beta,v(h) (ReL): -461.8377
mod_air_spaMM1$corrPars[[1]]
## $ARphi ## [1] 0.9615589
res <- matrix(residuals(mod_air_spaMM2)[,1], ncol = 12) (res_autocorr <- sapply(1:11, function(month) cor(res[month, ], res[(month + 1), ]) ))
## [1] 0.9406611 0.6197766 0.6047561 0.9350238 0.7525524 0.8799989 0.8560590 0.8265226 0.9047632 0.9025485 0.9111548
summary(res_autocorr)
## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.6048 0.7895 0.8800 0.8303 0.9080 0.9407
data.for.plot$pred_spaMM <- predict(mod_air_spaMM1) plot(obs ~ time, data = data.for.plot, type = "l", lwd = 3, ylim = c(0, 700), ylab = "Passengers") points(pred_spaMM ~ time, data = data.for.plot, type = "l", col = "green")
Note: never extrapolate using such model! The perfect fit is not unusual.
spaMMmod_air_spaMM2ML <- fitme(passengers ~ month*year_z + AR1(1|time %in% year), data = air, method = "ML") mod_air_no_year <- fitme(passengers ~ month + AR1(1|time %in% year), data = air, method = "ML")
## (This warning will show only once) ## Low (<1e-12) fitted residual variance (phi): this may be a genuine result ## for data without appropriate replicates and a model that allows overfitting, but ## (1) this may also point to problems in the data (duplicated response values?); ## (2) this may crash later computations. You may overcome this by setting ## control.HLfit$min_phi to 1e-10 or some other low, but not too low, value. ## Still, the computed likelihood maximum wrt all parameters may be inaccurate.
## Warning in spaMM::HLfit_body(init.HLfit = list(), ranFix = structure(list(:
anova(mod_air_spaMM2ML, mod_air_no_year)
## chi2_LR df p_value ## p_v 636.4098 12 0
c(logLik(mod_air_spaMM2ML), logLik(mod_air_no_year))
## p_v p_v ## -505.3456 -823.5505
nlmemod_air3ML <- lme(passengers ~ month * year, random = ~ 1 | time, data = air,
correlation = corAR1(), method = "ML")
mod_air_no_year2 <- lme(passengers ~ month, random = ~ 1 | time, data = air,
correlation = corAR1(), method = "ML")
anova(mod_air3ML, mod_air_no_year2)
## Model df AIC BIC logLik Test L.Ratio p-value ## mod_air3ML 1 27 1235.272 1315.457 -590.6362 ## mod_air_no_year2 2 15 1800.215 1844.762 -885.1072 1 vs 2 588.9421 <.0001
nlme::BodyWeight datasetdata("BodyWeight", package = "nlme")
plot(BodyWeight)
nlme::BodyWeight datasetbody <- as.data.frame(BodyWeight) body$Rat <- factor(body$Rat, levels = 1:16, order = FALSE) str(body)
## 'data.frame': 176 obs. of 4 variables: ## $ weight: num 240 250 255 260 262 258 266 266 265 272 ... ## $ Time : num 1 8 15 22 29 36 43 44 50 57 ... ## $ Rat : Factor w/ 16 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ Diet : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
unique(body$Time)
## [1] 1 8 15 22 29 36 43 44 50 57 64
(mod_rat1 <- lme(weight ~ Diet * Time, random = ~ Time|Rat, data = body))
## Linear mixed-effects model fit by REML ## Data: body ## Log-restricted-likelihood: -575.8599 ## Fixed: weight ~ Diet * Time ## (Intercept) Diet2 Diet3 Time Diet2:Time Diet3:Time ## 251.6516516 200.6654865 252.0716778 0.3596391 0.6058392 0.2983375 ## ## Random effects: ## Formula: ~Time | Rat ## Structure: General positive-definite, Log-Cholesky parametrization ## StdDev Corr ## (Intercept) 36.9390723 (Intr) ## Time 0.2484113 -0.149 ## Residual 4.4436052 ## ## Number of Observations: 176 ## Number of Groups: 16
plot(mod_rat1) ## there is some homoscedasticity but we will ignore it for now
plot(residuals(mod_rat1), type = "b")
(mod_rat2 <- lme(weight ~ Diet * Time, random = ~ Time|Rat, correlation = corExp(form = ~ Time), data = body))
## Linear mixed-effects model fit by REML ## Data: body ## Log-restricted-likelihood: -566.0296 ## Fixed: weight ~ Diet * Time ## (Intercept) Diet2 Diet3 Time Diet2:Time Diet3:Time ## 251.5924463 200.6889077 252.3152061 0.3602961 0.6255177 0.3109452 ## ## Random effects: ## Formula: ~Time | Rat ## Structure: General positive-definite, Log-Cholesky parametrization ## StdDev Corr ## (Intercept) 36.9744887 (Intr) ## Time 0.2434607 -0.149 ## Residual 4.5501283 ## ## Correlation Structure: Exponential spatial correlation ## Formula: ~Time | Rat ## Parameter estimate(s): ## range ## 3.650519 ## Number of Observations: 176 ## Number of Groups: 16
anova(mod_rat1, mod_rat2)
## Model df AIC BIC logLik Test L.Ratio p-value ## mod_rat1 1 10 1171.720 1203.078 -575.8599 ## mod_rat2 2 11 1154.059 1188.553 -566.0296 1 vs 2 19.66057 <.0001
Note: the comparison makes sense as models are nested and fitted with REML.
spaMM(mod_rat_spaMM <- fitme(weight ~ Diet * Time + (Time|Rat) + Matern(1|Time),
fixed = list(nu = 0.5),
data = body, method = "REML"))
## formula: weight ~ Diet * Time + (Time | Rat) + Matern(1 | Time)
## REML: Estimation of lambda, phi, corrPars and ranCoefs by REML.
## Estimation of fixed effects by ML.
## Family: gaussian ( link = identity )
## ------------ Fixed effects (beta) ------------
## Estimate Cond. SE t-value
## (Intercept) 251.6434 13.15459 19.130
## Diet2 200.6655 22.67951 8.848
## Diet3 252.0717 22.67951 11.115
## Time 0.3682 0.09668 3.808
## Diet2:Time 0.6058 0.15786 3.838
## Diet3:Time 0.2983 0.15786 1.890
## --------------- Random effects ---------------
## Family: gaussian ( link = identity )
## --- Correlation parameters:
## 2.nu 2.rho
## 0.5000000 0.1695024
## --- Random-coefficients Cov matrices:
## Group Term Var. Corr.
## Rat (Intercept) 1366
## Rat Time 0.0624 -0.1507
## --- Variance parameters ('lambda'):
## lambda = var(u) for u ~ Gaussian;
## Time : 2.844
## --- Coefficients for log(lambda):
## Group Term Estimate Cond.SE
## Time (Intercept) 1.045 0.5868
## # of obs: 176; # of groups: Rat, 32; Time, 11
## ------------- Residual variance -------------
## Coefficients for log(phi) ~ 1 :
## Estimate Cond. SE
## (Intercept) 2.826 0.12
## Estimate of phi=residual var: 16.88
## ------------- Likelihood values -------------
## logLik
## p_v(h) (marginal L): -577.0637
## p_beta,v(h) (ReL): -569.5571
mod_rat_spaMM_AR1 <- fitme(weight ~ Diet * Time + (Time|Rat) + AR1(1|Time),
fixed = list(nu = 0.5),
data = body, method = "REML")
logLik(mod_rat_spaMM)
## p_bv ## -569.5571
logLik(mod_rat_spaMM_AR1)
## p_bv ## -569.5571
Note 1: AR1 and Matern (with nu = 0.5) are equivalent if time is discrete!
Note 2: AR1 cannot work if time is not discrete!
nlme vs spaMMplot(predict(mod_rat_spaMM), predict(mod_rat2)) abline(0, 1, col = "red")
mod_rat_spaMM2 <- fitme(weight ~ Diet * Time + (Time|Rat), data = body,
method = "REML")
print(AIC(mod_rat_spaMM))
## marginal AIC: conditional AIC: dispersion AIC: effective df: ## 1174.1275 1035.8757 1147.1142 139.0042
print(AIC(mod_rat_spaMM2))
## marginal AIC: conditional AIC: dispersion AIC: effective df: ## 1180.5026 1057.5118 1153.7197 144.9506
spaMMmod_rat_spaMM3ML <- fitme(weight ~ Diet * Time + (Time|Rat) + Matern(1|Time),
fixed = list(nu = 0.5), data = body, method = "ML")
mod_rat_no_diet <- fitme(weight ~ 1 + Time + (Time|Rat) + Matern(1|Time),
fixed = list(nu = 0.5), data = body, method = "ML")
anova(mod_rat_spaMM3ML, mod_rat_no_diet)
## chi2_LR df p_value ## p_v 47.77998 4 1.048901e-09
c(logLik(mod_rat_spaMM3ML), logLik(mod_rat_no_diet))
## p_v p_v ## -576.7546 -600.6446
nlmemod_rat3ML <- lme(weight ~ Diet * Time, random = ~ Time|Rat,
correlation = corExp(form = ~ Time, nugget = TRUE), data = body, method = "ML")
mod_rat_no_diet2 <- lme(weight ~ 1 + Time, random = ~ Time|Rat,
correlation = corExp(form = ~ Time, nugget = TRUE), data = body, method = "ML")
anova(mod_rat3ML, mod_rat_no_diet2)
## Model df AIC BIC logLik Test L.Ratio p-value ## mod_rat3ML 1 12 1165.962 1204.007 -570.9808 ## mod_rat_no_diet2 2 8 1206.618 1231.982 -595.3088 1 vs 2 48.65601 <.0001
There are more complex autocorrelation functions out there.
spaMM:
mod_rat_Matern <- fitme(weight ~ Diet * Time + (Time|Rat) + Matern(1|Time), data = body) c(logLik(mod_rat_spaMM)[[1]], logLik(mod_rat_Matern)[[1]])
## [1] -569.5571 -576.5687
nlme:
mod_rat_corExp <- update(mod_rat1, corr = corExp(form = ~ Time, nugget = TRUE)) mod_rat_corRatio <- update(mod_rat1, corr = corRatio(form = ~ Time)) mod_rat_corSpher <- update(mod_rat1, corr = corSpher(form = ~ Time)) mod_rat_corLin <- update(mod_rat1, corr = corLin(form = ~ Time)) mod_rat_corGaus <- update(mod_rat1, corr = corGaus(form = ~ Time)) c(logLik(mod_rat2)[[1]], logLik(mod_rat_corExp)[[1]], logLik(mod_rat_corRatio)[[1]], logLik(mod_rat_corSpher)[[1]], logLik(mod_rat_corLin)[[1]], logLik(mod_rat_corGaus)[[1]])
## [1] -566.0296 -563.8841 -566.8747 -567.6167 -567.6167 -567.6167
Note: do not compare likelihood across packages!
data("Loaloa")
ndvi <- Loaloa[, c("maxNDVI", "latitude", "longitude")]
head(ndvi)
## maxNDVI latitude longitude ## X1 0.69 5.736750 8.041860 ## X2 0.74 5.680280 8.004330 ## X3 0.79 5.347222 8.905556 ## X4 0.67 5.917420 8.100720 ## X5 0.85 5.104540 8.182510 ## X6 0.80 5.355556 8.929167
library(maps)
spaMMplot2D(x = ndvi$longitude, y = ndvi$latitude, z = ndvi$maxNDVI, add.map = TRUE,
xlab = "Longitude", ylab = "Latitude", plot.title = title(main = "max NDVI"))
pairs(ndvi)
(mod_ndvi1 <- fitme(maxNDVI ~ 1 + Matern(1|longitude + latitude), data = ndvi, method = "REML"))
## formula: maxNDVI ~ 1 + Matern(1 | longitude + latitude)
## REML: Estimation of lambda, phi and corrPars by REML.
## Estimation of fixed effects by ML.
## Family: gaussian ( link = identity )
## ------------ Fixed effects (beta) ------------
## Estimate Cond. SE t-value
## (Intercept) 0.8006 0.02391 33.48
## --------------- Random effects ---------------
## Family: gaussian ( link = identity )
## --- Correlation parameters:
## 1.nu 1.rho
## 0.4066726 0.9345780
## --- Variance parameters ('lambda'):
## lambda = var(u) for u ~ Gaussian;
## longitude. : 0.004322
## --- Coefficients for log(lambda):
## Group Term Estimate Cond.SE
## longitude. (Intercept) -5.444 0.1229
## # of obs: 197; # of groups: longitude., 197
## ------------- Residual variance -------------
## Coefficients for log(phi) ~ 1 :
## Estimate Cond. SE
## (Intercept) -8.219 0.1775
## Estimate of phi=residual var: 0.0002696
## ------------- Likelihood values -------------
## logLik
## p_v(h) (marginal L): 378.1405
## p_beta,v(h) (ReL): 375.3261
mapMM(mod_ndvi1, add.map = TRUE, plot.title = title(xlab = "Longitude", ylab = "Latitude"))
filled.mapMM(mod_ndvi1, add.map = TRUE, plot.title = title(xlab = "Longitude", ylab = "Latitude"))
x.for.pred <- seq(min(ndvi$longitude), max(ndvi$longitude), length.out = 100) y.for.pred <- seq(min(ndvi$latitude), max(ndvi$latitude), length.out = 50) data.for.pred <- expand.grid(longitude = x.for.pred, latitude = y.for.pred) gridpred <- predict(mod_ndvi1, newdata = data.for.pred, variances = list(predVar = TRUE)) data.for.pred$predVar <- attr(gridpred, "predVar") m <- matrix(data.for.pred$predVar, ncol = length(y.for.pred))
spaMM.filled.contour(x = x.for.pred, y = y.for.pred, z = m, plot.axes = {
points(ndvi[, c("longitude", "latitude")])}, col = spaMM.colors(redshift = 3))
We used random effects to model variance components from random variables.
We can thus use the same kind of tools to also model the variance components of the errors.
Do not mix-up random variance components from residual ones: remember that in the error, all covariance terms are null (by definition).
mod_rat_spaMM <- fitme(weight ~ Diet * Time + (Time|Rat) + AR1(1|Time), data = body,
method = "REML")
coplot(residuals(mod_rat_spaMM) ~ I(1:nrow(body)) | body$Diet, show.given = FALSE)
mod_rat_hetero <- fitme(weight ~ Diet * Time + (Time|Rat) + AR1(1|Time), data = body,
method = "REML", resid.model = ~ Diet)
summary.tables <- summary(mod_rat_hetero)
summary.tables$phi_table
## Estimate Cond. SE ## (Intercept) 2.6702199 0.1698757 ## Diet2 -0.3221747 0.2961120 ## Diet3 0.6638038 0.2918609
print(rbind(AIC(mod_rat_spaMM),
AIC(mod_rat_hetero)))
## marginal AIC: conditional AIC: dispersion AIC: effective df: ## [1,] 1174.127 1035.876 1147.114 139.0042 ## [2,] 1168.732 1027.644 1141.668 138.8131
mod_rat_hetero <- fitme(weight ~ Diet * Time + (Time|Rat) + AR1(1|Time), data = body,
method = "ML", resid.model = ~ Diet)
mod_rat_hetero0 <- fitme(weight ~ Time + (Time|Rat) + AR1(1|Time), data = body,
method = "ML", resid.model = ~ Diet)
anova(mod_rat_hetero, mod_rat_hetero0)
## chi2_LR df p_value ## p_v 47.73446 4 1.072063e-09
set.seed(1L)
d <- data.frame(y = c(rnorm(100, mean = 10, sd = sqrt(10)),
rnorm(100, mean = 20, sd = sqrt(20))),
group = factor(c(rep("A", 100), rep("B", 100))))
m <- fitme(y ~ group, resid.model = ~ group, data = d, method = "REML")
unique(m$phi)
## [1] 8.067621 18.350646